#!/usr/local/bin/python3
# 确认上面的python3路径是否正确
import os
import sys
import requests
from hashlib import md5
from functools import partial
from urllib.parse import urlencode
from multiprocessing.pool import Pool

PAGE_NUM = 5

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15',
    'X-Requested-With': 'XMLHttpRequest'
}

def get_page_json(offset, keyword):
    '''生成ajax请求，发出get请求，并获取响应结果，以字典的形式返回json数据
    :param offset: 页面请求偏移值
    :type offset: 能被20整除的整数
    :param keyword: 图片搜索的关键词
    :type keyword: unicode字符串
    :return: 响应数据
    :rtype: dict
    '''
    params = {
        'offset': offset,
        'format': 'json',
        'keyword': keyword,
        'autoload': 'true',
        'count': '20',
        'cur_tab': '3',
        'from': 'gallery'
    }

    url = 'https://www.toutiao.com/search_content/?' + urlencode(params)
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            return response.json()
    except requests.ConnectionError as e:
        print('Error', e)

def parse_images_url(json):
    '''解析json字典，获得对应条目标题和图片链接
    :param json: 页面请求响应结果对应的字典数据
    :type json: dict
    '''
    data = json['data']
    if data:
        for item in data:
            title = item['title']
            images = item['image_list']
            if images:
                for image in images:
                    yield {
                        'image': 'http:' + image['url'].replace('list', 'origin'),
                        'title': title
                    }

def save_image_from(image_info, to_dir=''):
    '''根据链接获取图片，保存到标题命名的目录中，图片以md5码命名
    :param image_info: 包含标题和链接信息的字典数据
    :type image_info: dict
    :param to_dir: 要保存到的目录，默认值是空字符串，可指定目录，格式如: '狗狗'
    :type to_dir: unicode 字符串
    '''
    if image_info:
        print(image_info)
        image_dir = to_dir + '/' + image_info['title']
        if not os.path.exists(image_dir):
            os.makedirs(image_dir)
        try:
            response = requests.get(image_info['image'])
            if response.status_code == 200:
                image_path = '{0}/{1}.{2}'.format(image_dir, md5(response.content).hexdigest(), 'jpg')
                print(image_path)
                if not os.path.exists(image_path):
                    with open(image_path, 'wb') as f:
                        f.write(response.content)
                    print('图片下载完成！')
                else:
                    print('图片已下载 -> ', image_path)
        except requests.ConnectionError as e:
            print('图片下载失败！')
        except:
            print('出现异常！')

def main(offset, keyword):
    '''主函数，用于多进程调度
    :param offset: 页面链接offset参数的值
    :type offset: 能被20整除的整数
    :param keyword: 图片搜索关键词
    :type keyword: unicode字符串
    '''
    print('获取第', offset + 1, '～', offset + 20, '个条目...')
    json = get_page_json(offset, keyword)
    for image in parse_images_url(json):
        save_image_from(image, to_dir=keyword)

def get_help_info(command):
    '''生成帮助信息
    :param command: 命令的名称
    :type command: 字符串
    '''
    help_info = '\n使用:\n  ' + command + ' 关键字 请求个数\n'
    help_info += '  - 关键字: 图片搜索关键词\n  - 请求个数: 一次请求会获取20个图集的图片\n'
    help_info += '例如:\n  ' + command + ' 狗狗 5\n  -> 会获取100个图集中狗狗的图片\n'
    return help_info

def parse_argv(args=sys.argv):
    '''解析命令参数
    '''
    args_dict = {
        '-h': get_help_info(args[0]),
        '-k': '狗狗',
        '-n': PAGE_NUM
    }
    if len(args) != 3:
        print(args_dict['-h'])
        sys.exit(0)
    else:
        try:
            page_num = int(args[2])
            args_dict['-n'] = page_num if page_num <= 100 else 5
            args_dict['-k'] = args[1]
        except ValueError:
            print('请求个数有误，请输入整数')
            sys.exit(0)
        return args_dict

if __name__ == '__main__':
    args_dict = parse_argv()
    offset_list = [x * 20 for x in range(0, args_dict['-n'])]
    pool = Pool()
    partial_main = partial(main, keyword=args_dict['-k'])
    pool.map(partial_main, offset_list)
    pool.close()
    pool.join()
